This notebook is a comprehensive summary of the findings of this project. It is broken into 4 parts.
0.1 Load Libraries
0.2 Preprocess dataset
0.3 Examples from the dataset and ground-truth labels.
1.1 List of handcrafted features
1.2 Visualization of handcrafted features
1.3 Visualization of learned features
2.1 Rank handcrafted features using SHAP values
2.2 Rank handcrafted features using single-feature SVM
2.3 Rank handcrafted features using feature importance from Decision Tree
2.4 Combine all rankings
2.5 Rank handcrafted feature subgroups
2.6 Emotion-class-specific ranking of feature subgroups
3.1 Correlation between single handcrafted features and learned features
3.2 Correlation between handcrafted feature subgroups and learned features
#you'll need to change this path
%cd /Users/wenxindong/Desktop/affective_features/affective_features/
%pwd
import seaborn as sn
import matplotlib.pyplot as plt
import pandas as pd
from feature_analysis.visualize_feature_vectors import plot, plot3, tsne,summary_plot
from feature_analysis.correlate_handcraft_with_learned_features import linearReg, cca
from feature_analysis.util import replaceNaNs, subsample, load_npy_feature_dictionary, layer_names, drop_class, pca_handcrafted_feature,load_datasets, pca, svm, svc, random_forest,tree, linear_lr, nn, scale_input, confusion_matrix_plot, predict_score
from feature_analysis.hand_feature_keys import HANDCRAFT_FEATURES, HANDCRAFT_FEATURES_SUBGROUPS, LOW_LEVEL_FEATURES_SUBGROUPS, HIGH_LEVEL_FEATURES_SUBGROUPS
from feature_analysis.correlate_handcraft_with_learned_features import linearReg, cca_variance_analysis, canonical_communality_coef
from feature_analysis.feature_performance import evaluate_mlp
import numpy as np
from sklearn.preprocessing import StandardScaler
import torch
from sklearn.metrics import explained_variance_score
from artemis.emotions import ARTEMIS_EMOTIONS
from artemis.in_out.basics import create_dir, create_logger
from sklearn.decomposition import PCA
from sklearn import metrics
from sklearn.metrics import explained_variance_score
import seaborn as sns
from IPython.display import display
from sklearn.inspection import permutation_importance
from PIL import Image
import shap
%load_ext autoreload
%autoreload 2
'''load labels, feature vectors, and learned representation for dominant emotion images'''
'''we use the 34k dominant dataset (instead of the 80k full dataset), because dominant dataset has a majority emotion reponse, which can be used for binary-classification'''
LABELS_TRAIN = "datasets/labels_dominant+train.pt"
LABELS_TEST = "datasets/labels_dominant_test.pt"
LEARN_TRAIN = "datasets/no_pretrain_resnet34_train.npy"
LEARN_TEST = "datasets/no_pretrain_resnet34_test.npy"
THEORY_TRAIN = "datasets/combined_dominant_train.npy"
THEORY_TEST = "datasets/combined_dominant_test.npy"
ALEX_TRAIN = "datasets/alexnettrain.npy"
ALEX_TEST = "datasets/alexnettest.npy"
VGG16_TRAIN = "datasets/vgg16train.npy"
VGG16_TEST = "datasets/vgg16test.npy"
BBOX_TRAIN = "datasets/bbox_train.npy"
BBOX_TEST = "datasets/bbox_test.npy"
UNLEARNED_SEMANTICS_TRAIN = "datasets/unlearned_semantics_train.npy"
UNLEARNED_SEMANTICS_TEST = "datasets/unlearned_semantics_test.npy"
FILENAMES_TRAIN = [f.strip() for f in open("datasets/filenames_train.txt", "r").readlines()]
EMOTION_CLASSES = list(range(0, 9))
EMOTION_CLASSES_STRING = ARTEMIS_EMOTIONS
ALL_HANDCRAFT_FEATURES = HANDCRAFT_FEATURES
ALL_HANDCRAFT_FEATURES_SUBGROUPS = HANDCRAFT_FEATURES_SUBGROUPS
/Users/wenxindong/Desktop/affective_features/affective_features The autoreload extension is already loaded. To reload it, use: %reload_ext autoreload
We use a subset (30k) of the ArtEmis dataset(80k). This subset contains paintings that have the majority vote on a single emotion class. This subset allows us to predict discrete emotion labels instead of emotion distributions. Below shows a few examples from this subset:
def preprocess(sample_size, test_sample_size, pos_neg = False):
'''Preprocess feature vectors
:param sample_size: int How many samples to sample from each emotion class for the training set
:param test_sample_size'''
global ALL_HANDCRAFT_FEATURES
global ALL_HANDCRAFT_FEATURES_SUBGROUPS
global EMOTION_CLASSES
global EMOTION_CLASSES_STRING
all_hand_features_train = [THEORY_TRAIN, UNLEARNED_SEMANTICS_TRAIN, BBOX_TRAIN] #please don't change the order, as order in HANDCRAFT_FEATURES is fixed
all_hand_features_test = [THEORY_TEST, UNLEARNED_SEMANTICS_TEST, BBOX_TEST]
learned_train,learned_test, hand_train, hand_test, labels_train, labels_test, _,_,_ = load_datasets(LEARN_TRAIN, LEARN_TEST,all_hand_features_train, all_hand_features_test, LABELS_TRAIN, LABELS_TEST)
alex_train = np.load(ALEX_TRAIN, allow_pickle = True)
alex_test = np.load(ALEX_TEST, allow_pickle = True)
vgg16_train = np.load(VGG16_TRAIN, allow_pickle = True)
vgg16_test = np.load(VGG16_TEST, allow_pickle = True)
filenames_train = FILENAMES_TRAIN
'''get rid of NaNs'''
hand_train = replaceNaNs(hand_train)
hand_test = replaceNaNs(hand_test)
'''drop the emotion class "something else"(8) if we do binary classification'''
'''drop the emotion calss "anger"(4) if we do multiclass classification '''
dropping_class = 8 if pos_neg else 4
if dropping_class:
hand_train, learned_train, alex_train, vgg16_train, filenames_train, labels_train = drop_class(hand_train, [learned_train,alex_train, vgg16_train, filenames_train], labels_train, dropping_class)
hand_test, learned_test, alex_test, vgg16_test, labels_test = drop_class(hand_test, [learned_test, alex_test, vgg16_test], labels_test, dropping_class)
EMOTION_CLASSES = list(range(0, dropping_class)) + list(range(dropping_class+1, 9))
EMOTION_CLASSES_STRING = [ARTEMIS_EMOTIONS[i] for i in EMOTION_CLASSES]
assert(len(filenames_train) == len(learned_train))
'''convert labels into positive negative'''
'''0-3 are positive emotions, 4-7 are negative emotions, and 8 is something else'''
if pos_neg:
labels_train = (labels_train<4)*1 #1 = pos, 0 = neg
labels_test = (labels_test<4)*1
'''subsample to balance dataset'''
print("subsample {} from each emotion class for train set".format(sample_size))
learned_train, labels_train, sample_idxs = subsample(sample_size, learned_train, labels_train, seed = 0)
hand_train = hand_train[sample_idxs]
alex_train = alex_train[sample_idxs]
vgg16_train = vgg16_train[sample_idxs]
filenames_train = [filenames_train[i] for i in sample_idxs]
print("subsample {} from each emotion class for test set".format(test_sample_size))
learned_test, labels_test, sample_idxs = subsample(test_sample_size, learned_test, labels_test, seed = 0)
hand_test = hand_test[sample_idxs]
alex_test = alex_test[sample_idxs]
vgg16_test = vgg16_test[sample_idxs]
''' PCA high-dimensional handcrafted features'''
print("Applying PCA to some high-dimensional handcrafted features ... ")
hand_all = np.vstack([hand_train, hand_test])
hand_all, MODIFIED_HANDCRAFT_FEATURES, MODIFIED_HANDCRAFT_FEATURES_SUBGROUPS = pca_handcrafted_feature(["artist", "bbox"], hand_all, n_components=[ 0, 25])
hand_train = hand_all[0:len(hand_train), :]
hand_test = hand_all[len(hand_train):, :]
ALL_HANDCRAFT_FEATURES =MODIFIED_HANDCRAFT_FEATURES
ALL_HANDCRAFT_FEATURES_SUBGROUPS =MODIFIED_HANDCRAFT_FEATURES_SUBGROUPS
return learned_train,learned_test, hand_train, hand_test, labels_train, labels_test, alex_train, alex_test, vgg16_train, vgg16_test, filenames_train
print("BINARY ClASSIFICATION FEATURES\n")
learned_train,learned_test, hand_train, hand_test, labels_train, labels_test, alex_train, alex_test, vgg16_train, vgg16_test, filenames_train = preprocess(8000, 800, pos_neg = True)
print("MULTICLASS ClASSIFICATION FEATURES\n")
learned_train_multi,learned_test_multi, hand_train_multi, hand_test_multi, labels_train_multi, labels_test_multi, alex_train_multi, alex_test_multi, vgg16_train_multi, vgg16_test_multi, filenames_train_multi = preprocess(533, 65, pos_neg = False)
'''scale handcrafted features to 0 mean normal distribution'''
hand_train, hand_test = scale_input(hand_train, hand_test, with_mean=True)
hand_train_multi, hand_test_multi = scale_input(hand_train_multi, hand_test_multi, with_mean=True)
'''scale learned features to 0 mean normal distribution'''
learned_train,learned_test = scale_input(learned_train,learned_test, with_mean=True)
learned_train_multi,learned_test_multi = scale_input(learned_train_multi,learned_test_multi,with_mean=True)
'''create some helper dictionaries'''
LEGEND_MULTI = ARTEMIS_EMOTIONS[:4]+ARTEMIS_EMOTIONS[5:]
LEGEND_BI = ["Neg", "Pos"]
FEATURE_TO_IDX = {feature:idx for idx, feature in enumerate(ALL_HANDCRAFT_FEATURES)}
SUBGROUP_FEATURE_IDXS = dict()
for idx, feature_group in enumerate(ALL_HANDCRAFT_FEATURES_SUBGROUPS.keys()):
SUBGROUP_FEATURE_IDXS[feature_group] = [FEATURE_TO_IDX[feature] for feature in ALL_HANDCRAFT_FEATURES_SUBGROUPS[feature_group] if feature in FEATURE_TO_IDX]
SUBGROUP_FEATURE_IDXS["low"] = sum([SUBGROUP_FEATURE_IDXS[f] for f in LOW_LEVEL_FEATURES_SUBGROUPS.keys()], [])
SUBGROUP_FEATURE_IDXS["high"] = sum([SUBGROUP_FEATURE_IDXS[f] for f in HIGH_LEVEL_FEATURES_SUBGROUPS.keys()], [])
BINARY ClASSIFICATION FEATURES combined feature has shape (30941, 1323) combined feature has shape (3606, 1323) after dropping something else, there are 29128 examples left after dropping something else, there are 3406 examples left subsample 8000 from each emotion class for train set frequency of labels [[ 0 6632] [ 1 8000]] subsample 800 from each emotion class for test set frequency of labels [[ 0 766] [ 1 800]] Applying PCA to some high-dimensional handcrafted features ... decomposing... handcrafted features shape coming in (16198, 1323) embedded feature vectors after pca (16198, 25) variance captured: 0.9960941590882476 handcrafted features shape coming out (16198, 179) MULTICLASS ClASSIFICATION FEATURES combined feature has shape (30941, 1323) combined feature has shape (3606, 1323) after dropping anger, there are 30900 examples left after dropping anger, there are 3599 examples left subsample 533 from each emotion class for train set frequency of labels [[ 0 533] [ 1 533] [ 2 533] [ 3 533] [ 5 533] [ 6 533] [ 7 533] [ 8 533]] subsample 65 from each emotion class for test set frequency of labels [[ 0 65] [ 1 65] [ 2 65] [ 3 65] [ 5 65] [ 6 65] [ 7 65] [ 8 65]] Applying PCA to some high-dimensional handcrafted features ... decomposing... handcrafted features shape coming in (4784, 1323) embedded feature vectors after pca (4784, 25) variance captured: 0.9957754803281159 handcrafted features shape coming out (4784, 179)
np.random.seed(123)
for i in np.random.choice(4000, (5, ), replace=False):
display(Image.open(filenames_train_multi[i]).resize((200, 200)) )
print("label:", ARTEMIS_EMOTIONS[labels_train_multi[i]])
label: contentment
label: fear
label: disgust
label: something else
label: something else
First of all, there are two types of features: handcrafted features - features proposed by human based on knowledge and intuition, and learned features - featured automitically learned by a deep model.
Within handcrafted features, there are low-level and high-level features, as defined in literature.
Low-level features are features regarding color, texture, lines, shapes, contrast, and similarly other semantic-free elements.
High-level features are features that capture the semantic meaning and the global composition of the painting, such as bounding boxes, number of faces, genre, style, and artist.
Below lists features from the two groups. Features are grouped into subgroups within low-level and high-level features. For example, one feature subgroup is "hue".
pedantic = True
print("- Low-level features ({} in total):\n".format(len(SUBGROUP_FEATURE_IDXS["low"])))
if pedantic:
for group_name in LOW_LEVEL_FEATURES_SUBGROUPS.keys():
print(group_name,":")
print(ALL_HANDCRAFT_FEATURES_SUBGROUPS[group_name])
else: print(LOW_LEVEL_FEATURES_SUBGROUPS.keys())
print("- High-level features ({} in total):\n".format(len(SUBGROUP_FEATURE_IDXS["high"])))
if pedantic:
for group_name in HIGH_LEVEL_FEATURES_SUBGROUPS.keys():
print(group_name, ":")
print(ALL_HANDCRAFT_FEATURES_SUBGROUPS[group_name])
else: print(HIGH_LEVEL_FEATURES_SUBGROUPS.keys())
- Low-level features (112 in total): saturation and brightness : ['mean saturation', 'mean brightness', 'pleasure', 'arousal', 'dominance'] hue : ['saturation-weighted vector-based mean hue', 'saturation-weighted angular dispersion', 'vector-based mean hue', 'angular dispersion', 'colorfulness', 'black', 'blue', 'brown', 'green', 'gray', 'orange', 'pink', 'purple', 'red', 'white', 'yellow'] texture : ['coarseness', 'contrast', 'directionality', 'wavelet(level 1) hue', 'wavelet(level 1) saturation', 'wavelet(level 1) brightness', 'wavelet(level 2) hue', 'wavelet(level 2) saturation', 'wavelet(level 2) brightness', 'wavelet(level 3) hue', 'wavelet(level 3) saturation', 'wavelet(level 3) brightness', 'wavelet(level 1)', 'wavelet(level 2)', 'wavelet(level 3)', 'GLCM contrast (hue)', 'GLCM contrast (saturation)', 'GLCM contrast (brightness)', 'GLCM correlation (hue)', 'GLCM correlation (saturation)', 'GLCM correlation (brightness)', 'GLCM energy (hue)', 'GLCM energy (saturation)', 'GLCM energy (brightness)', 'GLCM homogeneity (hue)', 'GLCM homogeneity (saturation)', 'GLCM homogeneity (brightness)'] lines : ['static absolute line slopes', 'static relative line slopes', 'length of static lines', 'dynamic absolute line slopes', 'dynamic relative line slopes', 'length of dynamic lines'] rule of third : ['mean hue of inner rectangle', 'mean saturation of inner rectangle', 'mean brightness of inner rectangle', 'ratio of wavelet coef of inner rectgl vs. image. Hue', 'ratio of wavelet coef of inner rectgl vs. image. Saturation', 'ratio of wavelet coef of inner rectgl vs. image. Brightness'] bilateral symmetry : ['bilateral symmetry number', 'bilateral symmetry radius', 'bilateral symmetry angle', 'bilateral symmetry strength'] rotational symmetry : ['rotational symmetry radius 1', 'rotational symmetry X coord 1', 'rotational symmetry Y coord 1', 'rotational symmetry strength 1', 'rotational symmetry radius 2', 'rotational symmetry X coord 2', 'rotational symmetry Y coord 2', 'rotational symmetry strength 2', 'rotational symmetry radius 3', 'rotational symmetry X coord 3', 'rotational symmetry Y coord 3', 'rotational symmetry strength 3', 'rotational symmetry PCA 0', 'rotational symmetry PCA 1', 'rotational symmetry PCA 2', 'rotational symmetry PCA 3', 'rotational symmetry PCA 4', 'rotational symmetry PCA 5', 'rotational symmetry PCA 6', 'rotational symmetry PCA 7', 'rotational symmetry PCA 8', 'rotational symmetry PCA 9'] radial symmetry : ['radial symmetry map distribution 0', 'radial symmetry map distribution 1', 'radial symmetry map distribution 2', 'radial symmetry map distribution 3', 'radial symmetry map distribution 4', 'radial symmetry map distribution 5', 'radial symmetry map distribution 6', 'radial symmetry map distribution 7', 'radial symmetry map distribution 8', 'radial symmetry map distribution 9', 'radial symmetry map distribution 10', 'radial symmetry map distribution 11', 'radial symmetry map distribution 12', 'radial symmetry map distribution 13', 'radial symmetry map distribution 14', 'radial symmetry map distribution 15', 'radial symmetry map distribution 16', 'radial symmetry map distribution 17', 'radial symmetry map distribution 18', 'radial symmetry map distribution 19', 'radial symmetry map distribution 20', 'radial symmetry map distribution 21', 'radial symmetry map distribution 22', 'radial symmetry map distribution 23', 'radial symmetry map distribution 24', 'radial symmetry map distribution 25', 'radial symmetry map distribution 26', 'radial symmetry map distribution 27', 'radial symmetry map distribution 28', 'radial symmetry map distribution 29', 'radial symmetry map distribution 30', 'radial symmetry map distribution 31', 'radial symmetry map distribution 32', 'radial symmetry map distribution 33', 'radial symmetry map distribution 34', 'radial symmetry map distribution 35', 'radial symmetry PCA 0', 'radial symmetry PCA 1', 'radial symmetry PCA 2', 'radial symmetry PCA 3', 'radial symmetry PCA 4', 'radial symmetry PCA 5', 'radial symmetry PCA 6', 'radial symmetry PCA 7', 'radial symmetry PCA 8', 'radial symmetry PCA 9'] - High-level features (67 in total): genre : ['genre_is_missing', 'genre_is_portrait', 'genre_is_landscape', 'genre_is_still_life', 'genre_is_religious_painting', 'genre_is_sketch_and_study', 'genre_is_genre_painting', 'genre_is_illustration', 'genre_is_cityscape', 'genre_is_nude_painting', 'genre_is_abstract_painting'] artstyle : ['artstyle_is_Post_Impressionism', 'artstyle_is_Expressionism', 'artstyle_is_Impressionism', 'artstyle_is_Northern_Renaissance', 'artstyle_is_Realism', 'artstyle_is_Romanticism', 'artstyle_is_Art_Nouveau_Modern', 'artstyle_is_Symbolism', 'artstyle_is_Baroque', 'artstyle_is_Abstract_Expressionism', 'artstyle_is_Naive_Art_Primitivism', 'artstyle_is_Rococo', 'artstyle_is_Cubism', 'artstyle_is_Color_Field_Painting', 'artstyle_is_Pop_Art', 'artstyle_is_Pointillism', 'artstyle_is_Early_Renaissance', 'artstyle_is_Ukiyo_e', 'artstyle_is_Mannerism_Late_Renaissance', 'artstyle_is_High_Renaissance', 'artstyle_is_Minimalism', 'artstyle_is_Fauvism', 'artstyle_is_Action_painting', 'artstyle_is_Contemporary_Realism', 'artstyle_is_Synthetic_Cubism', 'artstyle_is_New_Realism', 'artstyle_is_Analytical_Cubism'] bbox : ['bbox confidence score #0', 'bbox confidence score #1', 'bbox confidence score #2', 'bbox confidence score #3', 'bbox confidence score #4', 'bbox confidence score #5', 'bbox confidence score #6', 'bbox confidence score #7', 'bbox confidence score #8', 'bbox confidence score #9', 'bbox coord xmin #0', 'bbox coord ymin #0', 'bbox coord xmax #0', 'bbox coord ymax #0', 'bbox coord xmin #1', 'bbox coord ymin #1', 'bbox coord xmax #1', 'bbox coord ymax #1', 'bbox coord xmin #2', 'bbox coord ymin #2', 'bbox coord xmax #2', 'bbox coord ymax #2', 'bbox coord xmin #3', 'bbox coord ymin #3', 'bbox coord xmax #3', 'bbox coord ymax #3', 'bbox coord xmin #4', 'bbox coord ymin #4', 'bbox coord xmax #4', 'bbox coord ymax #4', 'bbox coord xmin #5', 'bbox coord ymin #5', 'bbox coord xmax #5', 'bbox coord ymax #5', 'bbox coord xmin #6', 'bbox coord ymin #6', 'bbox coord xmax #6', 'bbox coord ymax #6', 'bbox coord xmin #7', 'bbox coord ymin #7', 'bbox coord xmax #7', 'bbox coord ymax #7', 'bbox coord xmin #8', 'bbox coord ymin #8', 'bbox coord xmax #8', 'bbox coord ymax #8', 'bbox coord xmin #9', 'bbox coord ymin #9', 'bbox coord xmax #9', 'bbox coord ymax #9', 'bbox PCA 0', 'bbox PCA 1', 'bbox PCA 2', 'bbox PCA 3', 'bbox PCA 4', 'bbox PCA 5', 'bbox PCA 6', 'bbox PCA 7', 'bbox PCA 8', 'bbox PCA 9', 'bbox PCA 10', 'bbox PCA 11', 'bbox PCA 12', 'bbox PCA 13', 'bbox PCA 14', 'bbox PCA 15', 'bbox PCA 16', 'bbox PCA 17', 'bbox PCA 18', 'bbox PCA 19', 'bbox PCA 20', 'bbox PCA 21', 'bbox PCA 22', 'bbox PCA 23', 'bbox PCA 24'] faces and skin : ['number of frontal faces', 'relative size of the biggest face', 'number of skin pixels', 'amount of skin wrt the size of faces']
These features is a curation of proposed features in literature.
All low-level features except symmetry features, and the "face and skins" high-level feature, come from this research paper. We adpated the code from a third-party implementation: Machajdik J, Hanbury A. Affective image classification using features inspired by psychology and art theory[C]//Proceedings of the 18th ACM international conference on Multimedia. 2010: 83-92.
Symmetry features (Bilateral, Rotational, and Radial) come from the following paper. Zhao, S., Gao, Y., Jiang, X., Yao, H., Chua, T. S., & Sun, X. (2014, November). Exploring principles-of-art features for image emotion recognition. In Proceedings of the 22nd ACM international conference on Multimedia (pp. 47-56).
High-level features:
The following shows 2D approximation of our handcrafted features. We concatenated all the handcrafted features together.
The plot on the left shows points with positive vs negative labels, and the plot on the right shows points with multi-class labels. Note that the points do not refer to the same examples, as we subsampled to create a balanced dataset.
The visualization method we use is t-SNE. t-SNE is a non-linear dimensionality reduction method, trained to perserve the local clustering of the points.
print("Low-level features:")
summary_plot(tsne(pca(hand_train[:, SUBGROUP_FEATURE_IDXS["low"]], 50),2), tsne(pca(hand_train_multi[:, SUBGROUP_FEATURE_IDXS["low"]], 50),2), labels_train, labels_train_multi, ["Neg", "Pos"], ARTEMIS_EMOTIONS[:4]+ARTEMIS_EMOTIONS[5:], figsize= (15,8))
print("High-level features:")
summary_plot(tsne(pca(hand_train[:, SUBGROUP_FEATURE_IDXS["high"]], 50),2), tsne(pca(hand_train_multi[:, SUBGROUP_FEATURE_IDXS["high"]], 50),2), labels_train, labels_train_multi, ["Neg", "Pos"], ARTEMIS_EMOTIONS[:4]+ARTEMIS_EMOTIONS[5:], figsize= (15,8))
print("All handcrafted features:")
summary_plot(tsne(pca(hand_train, 50),2), tsne(pca(hand_train_multi, 50),2), labels_train, labels_train_multi, ["Neg", "Pos"], ARTEMIS_EMOTIONS[:4]+ARTEMIS_EMOTIONS[5:], figsize= (15,8))
We obtained learned features by training a ResNet-34 model from scratch, without pretrained weights. The training task was multi-class classification. The training set was the full 80K artemis dataset. The accuracy obtained by the model on the test-set was 0.435.
summary_plot(tsne(pca(learned_train, 5),2), tsne(pca(learned_train_multi, 5),2), labels_train, labels_train_multi, ["Neg", "Pos"], ARTEMIS_EMOTIONS[:4]+ARTEMIS_EMOTIONS[5:], figsize= (15,8))
Remarks: the top 5 signifcant pca components capture 99% of learned features(100D)'s variance, whereas the top 50 pca components only capture 72% of handcrafted features(126D)' variance.
Let's run a simple logistic regression model to see the performance of these features.
LEGEND_MULTI = ARTEMIS_EMOTIONS[:4]+ARTEMIS_EMOTIONS[5:]
LEGEND_BI = ["Neg", "Pos"]
model_learned = linear_lr(learned_train, labels_train, max_iter=500)
prediction = model_learned.predict(learned_test)
score = metrics.accuracy_score(prediction, labels_test)
print("accuracy of logistic regression model (binary, learned features): %0.3f" % score)
# confusion_matrix_plot(model_bi_high,hand_test[:, SUBGROUP_FEATURE_IDXS["high"]], labels_test, prediction, LEGEND_BI)
model_bi = linear_lr(hand_train, labels_train, max_iter=300)
prediction = model_bi.predict(hand_test)
score = metrics.accuracy_score(prediction, labels_test)
print("accuracy of logistic regression model (binary, handcrafted features): %0.3f" % score)
# confusion_matrix_plot(model_bi,hand_test, labels_test, prediction, LEGEND_BI)
model_bi_low = linear_lr(hand_train[:, SUBGROUP_FEATURE_IDXS["low"]], labels_train, max_iter=400)
prediction = model_bi_low.predict(hand_test[:, SUBGROUP_FEATURE_IDXS["low"]])
score = metrics.accuracy_score(prediction, labels_test)
print("accuracy of logistic regression model (binary, low-level features): %0.3f" % score)
# confusion_matrix_plot(model_bi_low,hand_test[:, SUBGROUP_FEATURE_IDXS["low"]], labels_test, prediction, LEGEND_BI)
model_bi_high = linear_lr(hand_train[:, SUBGROUP_FEATURE_IDXS["high"]], labels_train, max_iter=400)
prediction = model_bi_high.predict(hand_test[:, SUBGROUP_FEATURE_IDXS["high"]])
score = metrics.accuracy_score(prediction, labels_test)
print("accuracy of logistic regression model (binary, high-level features): %0.3f" % score)
# confusion_matrix_plot(model_bi_high,hand_test[:, SUBGROUP_FEATURE_IDXS["high"]], labels_test, prediction, LEGEND_BI)
accuracy of logistic regression model (binary, learned features): 0.714 accuracy of logistic regression model (binary, handcrafted features): 0.717 accuracy of logistic regression model (binary, low-level features): 0.656 accuracy of logistic regression model (binary, high-level features): 0.677
remarks:
Let's find out which handcrafted features have the highest predictive power.
We use three methods to do this.
SHAP values SHAP value is the average marginal contribution of a feature to the final prediction. "An intuitive way to understand the Shapley value is the following illustration: The feature values enter a room in random order. All feature values in the room participate in the game (= contribute to the prediction). The Shapley value of a feature value is the average change in the prediction that the feature group already in the room receives when the new feature value joins them."
Use single feature SVM to rank features in terms of prediction accuracy
Use feature importance from Decision Tree Classifiers to rank features
We combine all rankings to get the most important handcrafted features.
def getShapValues(model, X_train, X_test, background=1000, explain = 200, nsamples=1000):
shap.initjs()
'''use 1000 examples as background data'''
background_data_idxs = np.random.choice(len(X_train), (background,))
'''use 200 examples as data to be explained'''
test_data_idxs = np.random.choice(len(X_test), (explain,))
X_explained = X_test[test_data_idxs,:]
explainer = shap.KernelExplainer(model.predict_proba, X_train[background_data_idxs, :])
shap_values = explainer.shap_values(X_explained, nsamples=nsamples)
return shap_values, explainer, X_explained
model_mu = linear_lr(hand_train_multi, labels_train_multi)
shap_values_bi, explainer_bi, hand_explained_bi = getShapValues(model_bi, hand_train, hand_test) #shap_value = [#class, #explain, #features]
shap_values_mu, explainer_mu, hand_explained_mu = getShapValues(model_mu, hand_train_multi, hand_test_multi) #shap_value = [#class, #explain, #features]
Using 1000 background data samples could cause slower run times. Consider using shap.sample(data, K) or shap.kmeans(data, K) to summarize the background as K samples. 100%|██████████| 200/200 [27:54<00:00, 8.37s/it]
Using 1000 background data samples could cause slower run times. Consider using shap.sample(data, K) or shap.kmeans(data, K) to summarize the background as K samples. 100%|██████████| 200/200 [29:13<00:00, 8.77s/it]
Visualize feature ranking. The plot on the top shows the top 10 features with the largest contributions to the final prediction. The bottom plot shows the ranking of the feature subgroups in terms of the sum over the shap values of all the features in the subgroup
top_n_features = 20
def shap_ranking(shap_values, test, top_n_features, legend, features):
plt.figure(figsize=(5,5))
shap.summary_plot(shap_values, test, features, class_names=legend, max_display = top_n_features, show =False)
shap_total = np.sum(np.abs(np.array(shap_values)), axis = (0, 1))/(len(shap_values[0])*len(shap_values))
shap_ranked_features = { x:v for v, x in sorted(zip(shap_total, ALL_HANDCRAFT_FEATURES), reverse = True) }
plt.title("Features ranked by SHAP values (top {}) ".format(top_n_features))
plt.show()
return shap_ranked_features
print("Binary Classification")
shap_ranked_features_bi = shap_ranking(shap_values_bi, hand_test, top_n_features, LEGEND_BI, ALL_HANDCRAFT_FEATURES)
print("Multiclass Classification")
shap_ranked_features_mu = shap_ranking(shap_values_mu, hand_test_multi, top_n_features, LEGEND_MULTI, ALL_HANDCRAFT_FEATURES)
Binary Classification
Multiclass Classification
To understand what SHAP values mean, we can visualize how each feature affects the prediction of a single example. If the shap value of a feature is negative, it moves the prediction line to the left, the confidence for the example belonging to the corresponding emotion class decreases, and vice-versa.
example_idx = 6#choose a random example
display(Image.open(filenames_train[example_idx]).resize((200, 200)) )
r = shap.multioutput_decision_plot(explainer_mu.expected_value.tolist(),
shap_values_mu,
example_idx,
feature_names=["{}({:3f})".format(f, v) for f, v in zip(ALL_HANDCRAFT_FEATURES, hand_train_multi[example_idx, :])],
feature_order='importance',
highlight=[],
legend_labels=LEGEND_MULTI,
return_objects=True,
legend_location='lower right')
print("final prediction", model_mu.predict(hand_train_multi[[example_idx],:]))
final prediction [6]
Let's use a second method to cross-validate our SHAP feature ranking.
We use single handcrafted features to predict multi-class labels. We use their accuracy on the test set to rank their predictive power.
def svm_ranking(hand_train, hand_test, labels_train, labels_test, feature_groups = ALL_HANDCRAFT_FEATURES_SUBGROUPS.keys(), show_single = True, show_group= True):
fig, axs = plt.subplots(1,2, figsize = (14, 5))
svm_results = {}
svm_results_grouped = {}
for idx, feature_group in enumerate(feature_groups):
handcrafted_features_idxs = SUBGROUP_FEATURE_IDXS[feature_group]
if not len(handcrafted_features_idxs): continue
model =svm(hand_train[:,handcrafted_features_idxs], labels_train)
svm_results_grouped[feature_group] = predict_score(model, hand_test[:, handcrafted_features_idxs], labels_test)
for j in handcrafted_features_idxs:
model =svm(hand_train[:,j].reshape(-1, 1), labels_train)
svm_results[ALL_HANDCRAFT_FEATURES[j]] = predict_score(model, hand_test[:, j].reshape(-1, 1), labels_test)
svm_ranked_features = dict(sorted(svm_results.items(), key = lambda item: -1* item[1]))
svm_results = dict(sorted(svm_results.items(), key = lambda item: -1* item[1])[:top_n_features])
svm_results_grouped = dict(sorted(svm_results_grouped.items(), key = lambda item: -1* item[1]))
svm_results = pd.DataFrame(svm_results, index = ['Accuracy']).transpose()
svm_results_grouped = pd.DataFrame(svm_results_grouped, index = ['Accuracy']).transpose()
if show_single:
heatmap = sn.heatmap(svm_results, annot=True, cmap='BrBG', ax = axs[0])
heatmap.set_title("Features ranked by SVM prediction accuracy", fontdict={'fontsize':12}, pad=12)
if show_group:
heatmap = sn.heatmap(svm_results_grouped, annot=True, cmap='BrBG', ax = axs[1])
heatmap.set_title("Feature subgroups ranked by SVM prediction accuracy", fontdict={'fontsize':12}, pad=12)
fig.tight_layout()
plt.show()
return svm_ranked_features, svm_results_grouped
print("Multiclass classification")
svm_ranked_features_mu, svm_results_grouped_mu = svm_ranking(hand_train_multi, hand_test_multi, labels_train_multi, labels_test_multi)
print("Binary classification")
svm_ranked_features_bi, svm_results_grouped_bi = svm_ranking(hand_train, hand_test, labels_train, labels_test)
Multiclass classification
Binary classification
As we see, SVM and SHAP give not-so-similar rankings. We now use a thrid method to cross-validate. We train a decision tree classifier and rank the features according to their feature importance value.
def tree_feature_importance(train, labels_train, test, labels_test, index):
tree_bi = tree(train, labels_train, max_depth = 10)
print("tree classifier acc: {:.3f}".format(predict_score(tree_bi, test, labels_test)))
tree_importances_bi = permutation_importance(
tree_bi, test, labels_test, n_repeats=10, random_state=1)
tree_importances_bi = pd.Series(tree_importances_bi.importances_mean)
return tree_importances_bi
tree_importances_mu = tree_feature_importance(hand_train_multi, labels_train_multi, hand_test_multi, labels_test_multi, ALL_HANDCRAFT_FEATURES)
tree_importances_bi = tree_feature_importance(hand_train, labels_train, hand_test, labels_test, ALL_HANDCRAFT_FEATURES)
tree_importances_mu
tree classifier acc: 0.265 tree classifier acc: 0.679
0 -0.000577
1 0.001538
2 -0.000385
3 0.001346
4 0.031538
...
174 0.000000
175 -0.000769
176 0.000000
177 0.000000
178 0.001154
Length: 179, dtype: float64
import seaborn as sns
def get_ranking(ranked_features, subgroup=False):
ranking_idx = {f[0]: i for i, f in enumerate(ranked_features.items())}
features = ALL_HANDCRAFT_FEATURES_SUBGROUPS.keys() if subgroup else ALL_HANDCRAFT_FEATURES
ranking = [ranking_idx[f] for f in features if f in ranked_features]
return ranking
def get_intersection(ranked_features, top_n_features = 10):
powerful_features = set(list(ranked_features[0].keys())[:top_n_features])
for r_f in ranked_features[1:]:
r_f_set = set(list(r_f.keys())[:top_n_features])
powerful_features = powerful_features.intersection(r_f_set)
return sorted(powerful_features)
tree_ranked_features_mu={ f: v for v, f in sorted(zip(tree_importances_mu, ALL_HANDCRAFT_FEATURES), key = lambda x: -x[0])}
tree_ranked_features_bi={ f: v for v, f in sorted(zip(tree_importances_bi, ALL_HANDCRAFT_FEATURES), key = lambda x: -x[0])}
svm_ranking_mu = get_ranking(svm_ranked_features_mu)
svm_ranking_bi = get_ranking(svm_ranked_features_bi)
tree_ranking_mu = get_ranking(tree_ranked_features_mu)
tree_ranking_bi = get_ranking(tree_ranked_features_bi)
shap_ranking_bi = get_ranking(shap_ranked_features_bi)
shap_ranking_mu = get_ranking(shap_ranked_features_mu)
summary_ranking = {
"SVM_bi": svm_ranking_bi,
"SVM_mu": svm_ranking_mu,
"Tree_bi": tree_ranking_bi,
"Tree_mu": tree_ranking_mu,
"SHAP_bi":shap_ranking_bi,
"SHAP_mu":shap_ranking_mu,
}
ranking_sum = [sum([r[1][i] for r in summary_ranking.items()]) for i in range(len(ALL_HANDCRAFT_FEATURES))]
summary_ranking["all"] = ranking_sum
summary_pd = pd.DataFrame(summary_ranking, index = ALL_HANDCRAFT_FEATURES).transpose()
plt.style.use('default')
fig, ax = plt.subplots(figsize=(20, 50))
sns.lineplot(data=summary_pd, markers=True, dashes=False, ax = ax )
plt.title("Handcrafted feature ranking (highest at 0)")
for line, name in zip(ax.lines, summary_pd.columns.tolist()):
y = line.get_ydata()[-1]
x = line.get_xdata()[-1]
if not np.isfinite(y):
y=next(reversed(line.get_ydata()[~line.get_ydata().mask]),float("nan"))
if not np.isfinite(y) or not np.isfinite(x):
continue
text = ax.annotate(name,
xy=(x, y),
xytext=(2, 1),
color=line.get_color(),
xycoords=(ax.get_xaxis_transform(),
ax.get_yaxis_transform()),
textcoords="offset points")
text_width = (text.get_window_extent(
fig.canvas.get_renderer()).transformed(ax.transData.inverted()).width)
if np.isfinite(text_width):
ax.set_xlim(ax.get_xlim()[0], text.xy[0] + text_width * 1.05)
plt.tight_layout()
plt.show()
Note how toward the bottom the lines are less messy -- powerful features are considered powerful in most methods, less powerful features are more easily affected by method specific differences. Out of 177 handcrafted features, these 5 have significant larger influence over a painting’s emotion:
ranking_sum_dict = {f:ranking_sum[i] for i, f in enumerate(ALL_HANDCRAFT_FEATURES)}
ranking_sum_dict = dict(sorted(ranking_sum_dict.items(), key = lambda x : x[1]))
top_features = [f[0] for f in ranking_sum_dict.items()][:20]
print("top 10 features are", top_features )
top 10 features are ['GLCM contrast (saturation)', 'colorfulness', 'black', 'genre_is_landscape', 'bbox PCA 0', 'GLCM homogeneity (saturation)', 'artstyle_is_Expressionism', 'bbox PCA 4', 'genre_is_religious_painting', 'dominance', 'angular dispersion', 'mean brightness of inner rectangle', 'yellow', 'bbox PCA 1', 'contrast', 'gray', 'GLCM correlation (hue)', 'amount of skin wrt the size of faces', 'bilateral symmetry strength', 'artstyle_is_Impressionism']
We found the most predictive handcrafted features! Let's see how whether they positively or negatively affect the emotions of a painting. If there is positive correlation between the feature value and it's Shapley value for the positive class, then higher feature values correlates positively with positive emotion.
for feature in top_features:
f = FEATURE_TO_IDX[feature]
contribution = np.corrcoef(shap_values_bi[1][:, f], hand_explained_bi[:, f])[0, 1]
print("{}'s contribution to the predicted class being positive has correlation: {:3f}".format(feature, contribution))
# shap.summary_plot( shap_values_bi[1][:, [f]], hand_explained_bi[:, [f]], feature_names = [ALL_HANDCRAFT_FEATURES[f]])
GLCM contrast (saturation)'s contribution to the predicted class being positive has correlation: -0.976888
colorfulness's contribution to the predicted class being positive has correlation: -0.981553
black's contribution to the predicted class being positive has correlation: -0.970349
genre_is_landscape's contribution to the predicted class being positive has correlation: 0.983313
bbox PCA 0's contribution to the predicted class being positive has correlation: -0.949133
GLCM homogeneity (saturation)'s contribution to the predicted class being positive has correlation: -0.989501
artstyle_is_Expressionism's contribution to the predicted class being positive has correlation: -0.992678
bbox PCA 4's contribution to the predicted class being positive has correlation: 0.982146
genre_is_religious_painting's contribution to the predicted class being positive has correlation: -0.987980
dominance's contribution to the predicted class being positive has correlation: 0.578267
angular dispersion's contribution to the predicted class being positive has correlation: 0.975504
mean brightness of inner rectangle's contribution to the predicted class being positive has correlation: 0.987824
yellow's contribution to the predicted class being positive has correlation: 0.923121
bbox PCA 1's contribution to the predicted class being positive has correlation: 0.973904
contrast's contribution to the predicted class being positive has correlation: -0.900972
gray's contribution to the predicted class being positive has correlation: 0.856875
GLCM correlation (hue)'s contribution to the predicted class being positive has correlation: -0.698096
amount of skin wrt the size of faces's contribution to the predicted class being positive has correlation: -0.981917
bilateral symmetry strength's contribution to the predicted class being positive has correlation: -0.928883
artstyle_is_Impressionism's contribution to the predicted class being positive has correlation: 0.974844
Now let's see per-class emotion contribution of each feature.
for feature in top_features:
f = FEATURE_TO_IDX[feature]
print("--"*10)
for e in list(range(0, 5)) + list(range(5, 8)):
contribution = np.corrcoef(shap_values_mu[e][:, f], hand_explained_mu[:, f])[0, 1]
print("{}'s contribution to the predicted class being {} has correlation: {:.3f}\n".format(feature, EMOTION_CLASSES_STRING[e], contribution))
-------------------- GLCM contrast (saturation)'s contribution to the predicted class being amusement has correlation: 0.980 GLCM contrast (saturation)'s contribution to the predicted class being awe has correlation: -0.869 GLCM contrast (saturation)'s contribution to the predicted class being contentment has correlation: -0.836 GLCM contrast (saturation)'s contribution to the predicted class being excitement has correlation: -0.457 GLCM contrast (saturation)'s contribution to the predicted class being disgust has correlation: 0.576 GLCM contrast (saturation)'s contribution to the predicted class being fear has correlation: -0.820 GLCM contrast (saturation)'s contribution to the predicted class being sadness has correlation: -0.842 GLCM contrast (saturation)'s contribution to the predicted class being something else has correlation: 0.448 -------------------- colorfulness's contribution to the predicted class being amusement has correlation: -0.930 colorfulness's contribution to the predicted class being awe has correlation: -0.457 colorfulness's contribution to the predicted class being contentment has correlation: -0.654 colorfulness's contribution to the predicted class being excitement has correlation: -0.910 colorfulness's contribution to the predicted class being disgust has correlation: 0.487 colorfulness's contribution to the predicted class being fear has correlation: 0.901 colorfulness's contribution to the predicted class being sadness has correlation: 0.731 colorfulness's contribution to the predicted class being something else has correlation: -0.063 -------------------- black's contribution to the predicted class being amusement has correlation: 0.940 black's contribution to the predicted class being awe has correlation: -0.944 black's contribution to the predicted class being contentment has correlation: -0.877 black's contribution to the predicted class being excitement has correlation: -0.854 black's contribution to the predicted class being disgust has correlation: -0.773 black's contribution to the predicted class being fear has correlation: 0.933 black's contribution to the predicted class being sadness has correlation: 0.455 black's contribution to the predicted class being something else has correlation: -0.491 -------------------- genre_is_landscape's contribution to the predicted class being amusement has correlation: -0.958 genre_is_landscape's contribution to the predicted class being awe has correlation: 0.971 genre_is_landscape's contribution to the predicted class being contentment has correlation: 0.980 genre_is_landscape's contribution to the predicted class being excitement has correlation: -0.905 genre_is_landscape's contribution to the predicted class being disgust has correlation: -0.922 genre_is_landscape's contribution to the predicted class being fear has correlation: -0.535 genre_is_landscape's contribution to the predicted class being sadness has correlation: -0.900 genre_is_landscape's contribution to the predicted class being something else has correlation: -0.927 -------------------- bbox PCA 0's contribution to the predicted class being amusement has correlation: 0.927 bbox PCA 0's contribution to the predicted class being awe has correlation: -0.005 bbox PCA 0's contribution to the predicted class being contentment has correlation: -0.871 bbox PCA 0's contribution to the predicted class being excitement has correlation: 0.929 bbox PCA 0's contribution to the predicted class being disgust has correlation: 0.826 bbox PCA 0's contribution to the predicted class being fear has correlation: -0.889 bbox PCA 0's contribution to the predicted class being sadness has correlation: 0.861 bbox PCA 0's contribution to the predicted class being something else has correlation: -0.887 -------------------- GLCM homogeneity (saturation)'s contribution to the predicted class being amusement has correlation: 0.969 GLCM homogeneity (saturation)'s contribution to the predicted class being awe has correlation: -0.936 GLCM homogeneity (saturation)'s contribution to the predicted class being contentment has correlation: -0.762 GLCM homogeneity (saturation)'s contribution to the predicted class being excitement has correlation: -0.921 GLCM homogeneity (saturation)'s contribution to the predicted class being disgust has correlation: 0.900 GLCM homogeneity (saturation)'s contribution to the predicted class being fear has correlation: -0.722 GLCM homogeneity (saturation)'s contribution to the predicted class being sadness has correlation: 0.802 GLCM homogeneity (saturation)'s contribution to the predicted class being something else has correlation: -0.365 -------------------- artstyle_is_Expressionism's contribution to the predicted class being amusement has correlation: -0.910 artstyle_is_Expressionism's contribution to the predicted class being awe has correlation: -0.942 artstyle_is_Expressionism's contribution to the predicted class being contentment has correlation: -0.903 artstyle_is_Expressionism's contribution to the predicted class being excitement has correlation: -0.918 artstyle_is_Expressionism's contribution to the predicted class being disgust has correlation: 0.924 artstyle_is_Expressionism's contribution to the predicted class being fear has correlation: 0.936 artstyle_is_Expressionism's contribution to the predicted class being sadness has correlation: 0.897 artstyle_is_Expressionism's contribution to the predicted class being something else has correlation: -0.397 -------------------- bbox PCA 4's contribution to the predicted class being amusement has correlation: 0.957 bbox PCA 4's contribution to the predicted class being awe has correlation: -0.908 bbox PCA 4's contribution to the predicted class being contentment has correlation: -0.923 bbox PCA 4's contribution to the predicted class being excitement has correlation: -0.843 bbox PCA 4's contribution to the predicted class being disgust has correlation: 0.945 bbox PCA 4's contribution to the predicted class being fear has correlation: -0.932 bbox PCA 4's contribution to the predicted class being sadness has correlation: 0.744 bbox PCA 4's contribution to the predicted class being something else has correlation: 0.386 -------------------- genre_is_religious_painting's contribution to the predicted class being amusement has correlation: -0.924 genre_is_religious_painting's contribution to the predicted class being awe has correlation: 0.896 genre_is_religious_painting's contribution to the predicted class being contentment has correlation: -0.929 genre_is_religious_painting's contribution to the predicted class being excitement has correlation: -0.889 genre_is_religious_painting's contribution to the predicted class being disgust has correlation: -0.913 genre_is_religious_painting's contribution to the predicted class being fear has correlation: 0.929 genre_is_religious_painting's contribution to the predicted class being sadness has correlation: 0.957 genre_is_religious_painting's contribution to the predicted class being something else has correlation: -0.558 -------------------- dominance's contribution to the predicted class being amusement has correlation: 0.937 dominance's contribution to the predicted class being awe has correlation: -0.930 dominance's contribution to the predicted class being contentment has correlation: -0.916 dominance's contribution to the predicted class being excitement has correlation: 0.817 dominance's contribution to the predicted class being disgust has correlation: -0.500 dominance's contribution to the predicted class being fear has correlation: -0.674 dominance's contribution to the predicted class being sadness has correlation: 0.484 dominance's contribution to the predicted class being something else has correlation: -0.405 -------------------- angular dispersion's contribution to the predicted class being amusement has correlation: 0.921 angular dispersion's contribution to the predicted class being awe has correlation: 0.918 angular dispersion's contribution to the predicted class being contentment has correlation: -0.883 angular dispersion's contribution to the predicted class being excitement has correlation: 0.727 angular dispersion's contribution to the predicted class being disgust has correlation: -0.296 angular dispersion's contribution to the predicted class being fear has correlation: -0.442 angular dispersion's contribution to the predicted class being sadness has correlation: -0.885 angular dispersion's contribution to the predicted class being something else has correlation: -0.905 -------------------- mean brightness of inner rectangle's contribution to the predicted class being amusement has correlation: 0.650 mean brightness of inner rectangle's contribution to the predicted class being awe has correlation: 0.922 mean brightness of inner rectangle's contribution to the predicted class being contentment has correlation: 0.879 mean brightness of inner rectangle's contribution to the predicted class being excitement has correlation: 0.835 mean brightness of inner rectangle's contribution to the predicted class being disgust has correlation: -0.938 mean brightness of inner rectangle's contribution to the predicted class being fear has correlation: -0.908 mean brightness of inner rectangle's contribution to the predicted class being sadness has correlation: -0.903 mean brightness of inner rectangle's contribution to the predicted class being something else has correlation: 0.798 -------------------- yellow's contribution to the predicted class being amusement has correlation: 0.419 yellow's contribution to the predicted class being awe has correlation: 0.945 yellow's contribution to the predicted class being contentment has correlation: 0.908 yellow's contribution to the predicted class being excitement has correlation: -0.904 yellow's contribution to the predicted class being disgust has correlation: 0.710 yellow's contribution to the predicted class being fear has correlation: -0.905 yellow's contribution to the predicted class being sadness has correlation: -0.909 yellow's contribution to the predicted class being something else has correlation: 0.840 -------------------- bbox PCA 1's contribution to the predicted class being amusement has correlation: -0.830 bbox PCA 1's contribution to the predicted class being awe has correlation: 0.905 bbox PCA 1's contribution to the predicted class being contentment has correlation: -0.868 bbox PCA 1's contribution to the predicted class being excitement has correlation: 0.921 bbox PCA 1's contribution to the predicted class being disgust has correlation: -0.943 bbox PCA 1's contribution to the predicted class being fear has correlation: 0.707 bbox PCA 1's contribution to the predicted class being sadness has correlation: -0.305 bbox PCA 1's contribution to the predicted class being something else has correlation: -0.372 -------------------- contrast's contribution to the predicted class being amusement has correlation: -0.779 contrast's contribution to the predicted class being awe has correlation: -0.398 contrast's contribution to the predicted class being contentment has correlation: 0.911 contrast's contribution to the predicted class being excitement has correlation: -0.793 contrast's contribution to the predicted class being disgust has correlation: -0.939 contrast's contribution to the predicted class being fear has correlation: 0.849 contrast's contribution to the predicted class being sadness has correlation: 0.769 contrast's contribution to the predicted class being something else has correlation: 0.600 -------------------- gray's contribution to the predicted class being amusement has correlation: 0.318 gray's contribution to the predicted class being awe has correlation: -0.838 gray's contribution to the predicted class being contentment has correlation: 0.912 gray's contribution to the predicted class being excitement has correlation: 0.811 gray's contribution to the predicted class being disgust has correlation: 0.433 gray's contribution to the predicted class being fear has correlation: 0.818 gray's contribution to the predicted class being sadness has correlation: 0.842 gray's contribution to the predicted class being something else has correlation: -0.930 -------------------- GLCM correlation (hue)'s contribution to the predicted class being amusement has correlation: 0.679 GLCM correlation (hue)'s contribution to the predicted class being awe has correlation: -0.798 GLCM correlation (hue)'s contribution to the predicted class being contentment has correlation: -0.840 GLCM correlation (hue)'s contribution to the predicted class being excitement has correlation: 0.559 GLCM correlation (hue)'s contribution to the predicted class being disgust has correlation: -0.909 GLCM correlation (hue)'s contribution to the predicted class being fear has correlation: 0.848 GLCM correlation (hue)'s contribution to the predicted class being sadness has correlation: 0.721 GLCM correlation (hue)'s contribution to the predicted class being something else has correlation: 0.737 -------------------- amount of skin wrt the size of faces's contribution to the predicted class being amusement has correlation: -0.962 amount of skin wrt the size of faces's contribution to the predicted class being awe has correlation: -0.364 amount of skin wrt the size of faces's contribution to the predicted class being contentment has correlation: 0.893 amount of skin wrt the size of faces's contribution to the predicted class being excitement has correlation: -0.892 amount of skin wrt the size of faces's contribution to the predicted class being disgust has correlation: 0.819 amount of skin wrt the size of faces's contribution to the predicted class being fear has correlation: 0.834 amount of skin wrt the size of faces's contribution to the predicted class being sadness has correlation: 0.849 amount of skin wrt the size of faces's contribution to the predicted class being something else has correlation: -0.294 -------------------- bilateral symmetry strength's contribution to the predicted class being amusement has correlation: -0.895 bilateral symmetry strength's contribution to the predicted class being awe has correlation: 0.877 bilateral symmetry strength's contribution to the predicted class being contentment has correlation: -0.446 bilateral symmetry strength's contribution to the predicted class being excitement has correlation: -0.908 bilateral symmetry strength's contribution to the predicted class being disgust has correlation: 0.217 bilateral symmetry strength's contribution to the predicted class being fear has correlation: -0.489 bilateral symmetry strength's contribution to the predicted class being sadness has correlation: 0.876 bilateral symmetry strength's contribution to the predicted class being something else has correlation: 0.840 -------------------- artstyle_is_Impressionism's contribution to the predicted class being amusement has correlation: -0.860 artstyle_is_Impressionism's contribution to the predicted class being awe has correlation: 0.773 artstyle_is_Impressionism's contribution to the predicted class being contentment has correlation: 0.969 artstyle_is_Impressionism's contribution to the predicted class being excitement has correlation: 0.936 artstyle_is_Impressionism's contribution to the predicted class being disgust has correlation: -0.945 artstyle_is_Impressionism's contribution to the predicted class being fear has correlation: -0.893 artstyle_is_Impressionism's contribution to the predicted class being sadness has correlation: -0.457 artstyle_is_Impressionism's contribution to the predicted class being something else has correlation: -0.933
We perform a similar analysis to feature groups instead of single features. This is helpful to for observing the aggregate effect of feature groups, especially for high-dimensional features such as artstyle and genre, as they are one-hot-encodings.
def shape_ranking_subgroups(shap_values, top_n_features, legend):
#contribution of single features to all classes
shap_total = np.sum(np.abs(np.array(shap_values)), axis = (0, 1))/(len(shap_values[0])*len(shap_values))
#contribution of single features to a single class
shap_values_np = np.sum(np.abs(shap_values), axis = 1)/len(shap_values[0])
plt.figure(figsize=(8,8))
shap_values_subsgroups = dict()
for idx, feature_group in enumerate(ALL_HANDCRAFT_FEATURES_SUBGROUPS.keys()):
subgroup_features = [FEATURE_TO_IDX[feature] for feature in ALL_HANDCRAFT_FEATURES_SUBGROUPS[feature_group] if feature in FEATURE_TO_IDX]
shap_values_subsgroups[feature_group] = [np.sum(shap_total[subgroup_features])] + np.sum(shap_values_np[:, subgroup_features], axis = 1).tolist()
shap_values_subsgroups = dict(sorted(shap_values_subsgroups.items(), key = lambda item: -1* item[1][0]))
shap_values_subsgroups = pd.DataFrame(shap_values_subsgroups, index = ["All classes"] + legend).transpose()
heatmap = sn.heatmap(shap_values_subsgroups, annot=True, cmap='BrBG', center = 0.02, vmin = 0, vmax = 0.1)
heatmap.set_title("Feature subgroups ranked by SHAP value", fontdict={'fontsize':10}, pad=12)
plt.show()
return shap_values_subsgroups
print("Ranking feature groups by the total |SHAP values| of the features in the subgroup [Binary Classification]:")
shap_ranked_subgroups_bi = shape_ranking_subgroups(shap_values_bi, 15, LEGEND_BI)
print("Ranking feature groups by the total |SHAP values| of the features in the subgroup [Multiclass Classification]:")
shap_ranked_subgroups_mu = shape_ranking_subgroups(shap_values_mu, 15, LEGEND_MULTI)
svm_results_grouped_mu.to_dict()
np.array(svm_results_grouped_bi)
summary_ranking_grouped = {
"SVM_bi": get_ranking(svm_results_grouped_bi.to_dict()['Accuracy'], subgroup = True),
"SVM_mu": get_ranking(svm_results_grouped_mu.to_dict()['Accuracy'], subgroup = True),
"SHAP_bi":get_ranking(shap_ranked_subgroups_bi.to_dict()["All classes"], subgroup = True),
"SHAP_mu":get_ranking(shap_ranked_subgroups_mu.to_dict()["All classes"], subgroup = True)
}
summary_ranking_grouped["SHAP+SVM_bi"] = [i+j for i, j in zip(summary_ranking_grouped["SVM_bi"], summary_ranking_grouped["SHAP_bi"])]
summary_ranking_grouped["SHAP+SVM_mu"] = [i+j for i, j in zip(summary_ranking_grouped["SVM_mu"], summary_ranking_grouped["SHAP_mu"])]
summary_ranking_grouped["SHAP+SVM "] = [i+j for i, j in zip(summary_ranking_grouped["SHAP+SVM_bi"], summary_ranking_grouped["SHAP+SVM_mu"])]
summary_pd = pd.DataFrame(summary_ranking_grouped, index =ALL_HANDCRAFT_FEATURES_SUBGROUPS.keys() ).transpose()
# plt.figure(figsize=(10, 10))
fig, ax = plt.subplots(figsize=(10, 10))
sns.lineplot(data=summary_pd, markers=True, dashes=False, ax=ax)
# plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plt.title("Handcrafted feature subgroups ranking (highest at 0)")
for line, name in zip(ax.lines, summary_pd.columns.tolist()):
y = line.get_ydata()[-1]
x = line.get_xdata()[-1]
if not np.isfinite(y):
y=next(reversed(line.get_ydata()[~line.get_ydata().mask]),float("nan"))
if not np.isfinite(y) or not np.isfinite(x):
continue
text = ax.annotate(name,
xy=(x, y),
xytext=(2, 1),
color=line.get_color(),
xycoords=(ax.get_xaxis_transform(),
ax.get_yaxis_transform()),
textcoords="offset points")
text_width = (text.get_window_extent(
fig.canvas.get_renderer()).transformed(ax.transData.inverted()).width)
if np.isfinite(text_width):
ax.set_xlim(ax.get_xlim()[0], text.xy[0] + text_width * 1.05)
plt.tight_layout()
plt.show()
Ranking feature groups by the total |SHAP values| of the features in the subgroup [Binary Classification]:
Ranking feature groups by the total |SHAP values| of the features in the subgroup [Multiclass Classification]:
Feature group ranking is artstyle > hue > texture = genre > bbox > rule of third > saturation and brightness > radial symmetry> rotational symmetry = bilateral symmetry > faces and skin > lines.
Remarks:
We have ranked feature groups in the previous section. Now we dive deeper and see which features are more relevant to the classification of a specific class. We do this through ranking features using one-class-versus-all SVM classification accuracy.
from collections import defaultdict
def ova_ranking(X_train, X_test, labels_train, labels_test, grouped = False, top_n_features=10):
svm_results = defaultdict(list)
for i in range(9):
score = 0
if i==4: continue #skip anger
N = len(X_train[labels_train==i])
idxs = np.random.randint(0, len(labels_train)-N, (N,))
X_train_multi_temp = np.vstack([X_train[labels_train!=i][idxs], X_train[labels_train==i]])
labels_train_temp = [0 for _ in range(N)] + [1 for _ in range(N)]
labels_test_temp = labels_test == i
for idx, feature_group in enumerate(ALL_HANDCRAFT_FEATURES_SUBGROUPS.keys()):
handcrafted_features_idxs = [FEATURE_TO_IDX[feature] for feature in ALL_HANDCRAFT_FEATURES_SUBGROUPS[feature_group] if feature in FEATURE_TO_IDX]
if len(handcrafted_features_idxs)<=0:continue
if grouped:
model =linear_lr(X_train_multi_temp[:,handcrafted_features_idxs], labels_train_temp)
svm_results[feature_group].append(predict_score(model, X_test[:, handcrafted_features_idxs], labels_test))
else:
#using single handcrafted features
for j in handcrafted_features_idxs:
model =linear_lr(X_train_multi_temp[:,j].reshape(-1, 1), labels_train_temp)
svm_results[ALL_HANDCRAFT_FEATURES[j]].append(predict_score(model, X_test[:, j].reshape(-1, 1), labels_test))
model =linear_lr(X_train_multi_temp, labels_train_temp)
svm_results["All combined"].append(predict_score(model, X_test, labels_test))
plt.figure(figsize=(7,7))
svm_results = pd.DataFrame(svm_results, index = ARTEMIS_EMOTIONS[0:4]+ARTEMIS_EMOTIONS[5:]).transpose()
heatmap = sn.heatmap(svm_results, annot=True, cmap='BrBG', center = 0.1, vmin = 0, vmax = 0.2)
heatmap.set_title("SVM One-vs-All Emotion prediction using feature groups", fontdict={'fontsize':12}, pad=12)
plt.show()
return svm_results
ova_results = ova_ranking(hand_train_multi, hand_test_multi, labels_train_multi, labels_test_multi, grouped = True, top_n_features=10)
remarks:
Below, we carry the same one-class-versus-all task using learned features. The result suggests that handcrafted features are not biased. Instead, some classes are intrinsically harder to predict/harder for humans to label accurately.
def ova_ranking_learned(X_train, X_test, labels_train, labels_test, grouped = False, top_n_features=10):
svm_results = []
for i in range(9):
score = 0
if i==4: continue #skip anger
N = len(X_train[labels_train==i])
idxs = np.random.randint(0, len(labels_train)-N, (N,))
X_train_multi_temp = np.vstack([X_train[labels_train!=i][idxs], X_train[labels_train==i]])
labels_train_temp = [0 for _ in range(N)] + [1 for _ in range(N)]
labels_test_temp = labels_test == i
model =linear_lr(X_train_multi_temp, labels_train_temp)
svm_results.append(predict_score(model, X_test, labels_test))
plt.figure(figsize=(7,1))
svm_results = pd.DataFrame(svm_results, index = ARTEMIS_EMOTIONS[0:4]+ARTEMIS_EMOTIONS[5:]).transpose()
heatmap = sn.heatmap(svm_results, annot=True, cmap='BrBG', center = 0.1, vmin = 0, vmax = 0.2)
heatmap.set_title("Logistic Regression One-vs-All Emotion prediction using learned representation", fontdict={'fontsize':12}, pad=12)
plt.show()
return svm_results
r = ova_ranking_learned(learned_train_multi, learned_test_multi, labels_train_multi, labels_test_multi, grouped = False, top_n_features=10)
Emotion class "amusement", "disgust", and "something else" are harder to predict. This agrees with our instinct -- those emotions are more nuanced and subjective.
So far, we found which handcrafted features are more powerful. We used three methods to do this: SHAP, single-feature SVM, and decision tree.
Now, we investigate what a resnet34 model has learned when trained from scratch. The resnet model used randomly initiated weights, and was early stopped using cross-validation.
We use handcrafted features as landmarks to investigate what it has learned. Does it learn more low-level or more high-level features?
We carry out two methods:
Train Multilayer Perceptron, SVM, and Decision Tree with combined learned and handcrafted features, and compare the relative improvement in prediction accuracy in all three classifiers. This gives us a rough idea for which handcrafted features provide new and useful information.
Correlate single handcrafted features with learned features using linear regression, then rank handcrafted features according to the correlation coefficient of the learned linear mappings. This gives us a rough approximation for what information is encoded in the learned features.
For feature groups, we can no longer learn just a single linear mapping since the feature groups form a N dim vector. For this reason, we use Canonical Correlation Analysis to learn N linear mappings that are uncorrelated with each other. We find the total variance explained of the N linear mappings.
def analyse_linear_combination_single_feature(learned_representations,handcraft_features, labels, learned_t, hand_t, labels_t, create_heapmap = True, top_n = None):
'''correlation: use learned representation to predict handcrafted features'''
'''canonical commutanity coefficient: proportion of variance in each variable that is explained by the complete canonical solution'''
baseline = evaluate_mlp(learned_representations, labels, learned_t, labels_t)
coefs = dict()
ccc = canonical_communality_coef(handcraft_features, learned_representations)
for idx in range(len(ALL_HANDCRAFT_FEATURES)):
ori_hand, pred_hand = linearReg(learned_representations, handcraft_features[:, idx])
correlation = np.abs(np.corrcoef(pred_hand, ori_hand, rowvar = True)[1, 0])
N =handcraft_features[:, idx].shape[0]
features_train = np.concatenate([learned_representations, handcraft_features[:, idx].reshape(N, 1)], axis = 1)
features_test = np.concatenate([learned_t, hand_t[:,idx].reshape(hand_t[:,idx].shape[0], 1)], axis = 1)
improvement = evaluate_mlp(features_train, labels, features_test, labels_t) - baseline
improvement*=100
coefs[ALL_HANDCRAFT_FEATURES[idx]]= [correlation, ccc[idx], improvement]
rand_ori, rand_pred = linearReg(learned_representations, np.random.normal(-1, 1, handcraft_features[:, 0].shape))
corr_rand = np.abs(np.corrcoef(rand_pred, rand_ori, rowvar = True)[1, 0])
coefs["RANDOM FEATURE FOR BENCHMARK"] = [corr_rand, -1, 0]
if create_heapmap:
top_n = top_n if top_n !=None else len(coefs)
plt.figure(figsize=(10, 0.25*top_n))
coefs = dict([x for _, x in sorted(zip(ranking_sum, coefs.items()))])
correlation_ranked_features = [item[0] for item in coefs.items()]
coefs = pd.DataFrame(coefs, index = ['Correlation', "canonical communality coefficient", "Accuracy improvement"]).transpose()
heatmap = sn.heatmap(coefs, vmin=0, vmax=1, annot=True, cmap='BrBG')
heatmap.set_title("Information shared between handcrafted features and learned features", fontdict={'fontsize':12}, pad=12)
plt.show()
return correlation_ranked_features
def analyse_linear_combination_feature_groups(learned_representations,handcraft_features, labels, learned_t, hand_t, labels_t, create_heapmap = True, analyse_groups = True, top_n = None):
'''variance of handcrafted feature groups explained by the best linear combinations of learned representation'''
baseline = evaluate_mlp(learned_representations, labels, learned_t, labels_t)
canonical_coefs = dict()
for group in ALL_HANDCRAFT_FEATURES_SUBGROUPS.keys():
feature_idxs = SUBGROUP_FEATURE_IDXS[group]
shared_variance = cca_variance_analysis(handcraft_features[:,feature_idxs], learned_representations)
features_train = np.concatenate([learned_representations, handcraft_features[:, feature_idxs]], axis = 1)
features_test = np.concatenate([learned_t, hand_t[:,feature_idxs]], axis = 1)
improvement = evaluate_mlp(features_train, labels, features_test, labels_t) - baseline
improvement*=100
canonical_coefs["{}({}D)".format(group, len(feature_idxs))] = [ shared_variance, improvement] #correlation,
if create_heapmap:
top_n = top_n if top_n !=None else len(canonical_coefs)
plt.figure(figsize=(10, 5))
canonical_coefs = dict(sorted(canonical_coefs.items(), key=lambda item: -1*item[1][0])[:top_n])
correlation_ranked_features = [item[0] for item in canonical_coefs.items()]
canonical_coefs = pd.DataFrame(canonical_coefs, index = ['Shared variance', 'Accuracy improvement']).transpose()
heatmap = sn.heatmap(canonical_coefs, vmin=0, vmax=1, annot=True, cmap='BrBG')
heatmap.set_title("Variance shared between learned feature subgroups and handcrafted feature", fontdict={'fontsize':12}, pad=12)
plt.show()
return correlation_ranked_features
correlation_ranked_features = analyse_linear_combination_single_feature(learned_train_multi,hand_train_multi, labels_train_multi,learned_test_multi,hand_test_multi, labels_test_multi, create_heapmap = True)
Liblinear failed to converge, increase the number of iterations. Liblinear failed to converge, increase the number of iterations. Liblinear failed to converge, increase the number of iterations. Liblinear failed to converge, increase the number of iterations. Liblinear failed to converge, increase the number of iterations. Liblinear failed to converge, increase the number of iterations. Liblinear failed to converge, increase the number of iterations. Liblinear failed to converge, increase the number of iterations. Liblinear failed to converge, increase the number of iterations. Liblinear failed to converge, increase the number of iterations. Liblinear failed to converge, increase the number of iterations. Liblinear failed to converge, increase the number of iterations. Liblinear failed to converge, increase the number of iterations. Liblinear failed to converge, increase the number of iterations. Liblinear failed to converge, increase the number of iterations. Liblinear failed to converge, increase the number of iterations. Liblinear failed to converge, increase the number of iterations. Liblinear failed to converge, increase the number of iterations. Liblinear failed to converge, increase the number of iterations. Liblinear failed to converge, increase the number of iterations. Liblinear failed to converge, increase the number of iterations. Liblinear failed to converge, increase the number of iterations. Liblinear failed to converge, increase the number of iterations. Liblinear failed to converge, increase the number of iterations. Liblinear failed to converge, increase the number of iterations.
The first column is the Pearson correlation coefficient between the handcrafted feature and the linear mapping of the learned features. The second column is the canonical communality coefficient of the feature, it represents how much vairance of the original feature was captured in all of the canonical functions. It informs one about how useful the observed variable was for the entire analysis. The thrid column is the improvement in prediction accuracy after concatenating the learned features with the single handcrafted feature. We take the average of the improvement in accracy of three separate models: SVM, Decision Tree, and Multilayer perceptron.
Note that the learned feature is 100 dimensional. We are modeling 1 dimentional features using 100 dimensional features. It is easy to get high correlation and hard to get weak correlation. If a feature has weak correlation with the learned features, it suggests that it models noise when considered as a singled out feature.
Let's analyse feature groups as a whole and see how much variance they share with the learned features.
_ = analyse_linear_combination_feature_groups(learned_train,hand_train, labels_train,learned_test,hand_test, labels_test, create_heapmap = True, analyse_groups = True)
Maximum number of iterations reached Maximum number of iterations reached
The above results suggests the importance of low-level features in the task of emotion classification. The learned features correlates more with low-level features than high-level features.
However, it is possible that all classification tasks depend on low-level features, not just emotion classification. It is also possible that low-level features are instrincally encoded in the pixel values and get carried through by convnet models. To verify that this is not the case, we look at the correlation between image representation from alexnet and vgg16, we see that they have smaller correlation coefficients with the handcrafted features compared to the image representations from the resnet model. This suggests that our resnet model, trained on the task of emotion classification of paintings retained more low-level information through out.
_ = analyse_linear_combination_feature_groups(alex_train,hand_train, labels_train,alex_test,hand_test, labels_test, create_heapmap = True, analyse_groups = True)
Maximum number of iterations reached Maximum number of iterations reached
_ = analyse_linear_combination_feature_groups(vgg16_train,hand_train, labels_train, vgg16_test,hand_test, labels_test,create_heapmap = True, analyse_groups = True)
Maximum number of iterations reached Maximum number of iterations reached
We concatenate feature combinations and train a MultiLayer Perceptron with 3 hidden layers, batch normalization and dropout (0.2). We also run a SVM on the same datasets for sanity check.
Remarks:
High-level features improve accuracy of learned representations, suggesting the pretrained resnet model did not pick up all the relevant semantic information. This is not surprising as the high-level features encode the artstyle, genre, and bbox of people, which are very hard to learned given a 80k dataset. More importantly, the task the resnet model was trained on was only emotion prediction.
Low-level features do not improve accuracy of learned representations as much as high-level features do, suggesting that they offer less new information to the learned representations.
Best accuracy is obtained by concatenating learned representation with handcrafted features and alexnet image representations. Alexnet is trained on ImageNet classification, and it's image representation capture high-level semantic imformation.
t = evaluate_mlp(learned_train_multi, labels_train_multi, learned_test_multi, labels_test_multi, verbose=True)
print("MLP using learned features: {:3f}".format(t))
t= evaluate_mlp(hand_train_multi, labels_train_multi, hand_test_multi, labels_test_multi, verbose=True)
print("MLP using handcrafted features: {:3f}".format(t))
features_train = np.concatenate([learned_train_multi, hand_train_multi[:, SUBGROUP_FEATURE_IDXS["low"]]], axis = 1)
features_test = np.concatenate([learned_test_multi, hand_test_multi[:, SUBGROUP_FEATURE_IDXS["low"]]], axis = 1)
t= evaluate_mlp(features_train, labels_train_multi, features_test, labels_test_multi, verbose=True)
print("MLP using learned features + low-level handcrafted features: {:3f}".format(t))
for f in LOW_LEVEL_FEATURES_SUBGROUPS.keys():
features_train = np.concatenate([learned_train_multi, hand_train_multi[:, SUBGROUP_FEATURE_IDXS[f]]], axis = 1)
features_test = np.concatenate([learned_test_multi, hand_test_multi[:, SUBGROUP_FEATURE_IDXS[f]]], axis = 1)
t = evaluate_mlp(features_train, labels_train_multi, features_test, labels_test_multi)
print("MLP using learned features + low-level handcrafted feature group {}: {:3f}".format(f, t))
features_train = np.concatenate([learned_train_multi, hand_train_multi[:, SUBGROUP_FEATURE_IDXS["high"]]], axis = 1)
features_test = np.concatenate([learned_test_multi, hand_test_multi[:, SUBGROUP_FEATURE_IDXS["high"]]], axis = 1)
t = evaluate_mlp(features_train, labels_train_multi, features_test, labels_test_multi, verbose=True)
print("MLP using learned features + high-level handcrafted features: {:3f}".format(t))
for f in HIGH_LEVEL_FEATURES_SUBGROUPS.keys():
features_train = np.concatenate([learned_train_multi, hand_train_multi[:, SUBGROUP_FEATURE_IDXS[f]]], axis = 1)
features_test = np.concatenate([learned_test_multi, hand_test_multi[:, SUBGROUP_FEATURE_IDXS[f]]], axis = 1)
t = evaluate_mlp(features_train, labels_train_multi, features_test, labels_test_multi)
print("MLP using learned features + high-level handcrafted feature group {} : {:3f}".format(f, t))
features_train = np.concatenate([learned_train_multi, hand_train_multi], axis = 1)
features_test = np.concatenate([learned_test_multi, hand_test_multi], axis = 1)
t= evaluate_mlp(features_train, labels_train_multi, features_test, labels_test_multi, verbose=True)
print("MLP using learned features + handcrafted features: {:3f}".format(t))
t=evaluate_mlp(alex_train_multi, labels_train_multi, alex_test_multi, labels_test_multi, verbose=True)
print("MLP using alex features: {:3f}".format(t))
features_train = np.concatenate([learned_train_multi, alex_train_multi], axis = 1)
features_test = np.concatenate([learned_test_multi, alex_test_multi], axis = 1)
t=evaluate_mlp(features_train, labels_train_multi, features_test, labels_test_multi, verbose=True)
print("MLP using alexnet features + learned features: {:3f}".format(t))
features_train = np.concatenate([learned_train_multi, hand_train_multi, alex_train_multi], axis = 1)
features_test = np.concatenate([learned_test_multi, hand_test_multi, alex_test_multi], axis = 1)
t=evaluate_mlp(features_train, labels_train_multi, features_test, labels_test_multi, verbose=True)
print("MLP using learned features + handcrafted features + alexnet features : {:3f}".format(t))
1.mlp acc: 0.312 2.svm acc 0.333 3.tree acc 0.227 MLP using learned features: 0.290385 1.mlp acc: 0.298 2.svm acc 0.354 3.tree acc 0.238 MLP using handcrafted features: 0.296795 1.mlp acc: 0.327 2.svm acc 0.319 3.tree acc 0.242 MLP using learned features + low-level handcrafted features: 0.296154 MLP using learned features + low-level handcrafted feature group saturation and brightness: 0.296154 MLP using learned features + low-level handcrafted feature group hue: 0.289103 MLP using learned features + low-level handcrafted feature group texture: 0.298718 MLP using learned features + low-level handcrafted feature group lines: 0.300000 MLP using learned features + low-level handcrafted feature group rule of third: 0.297436 MLP using learned features + low-level handcrafted feature group bilateral symmetry: 0.317949 MLP using learned features + low-level handcrafted feature group rotational symmetry: 0.293590 MLP using learned features + low-level handcrafted feature group radial symmetry: 0.292308 1.mlp acc: 0.354 2.svm acc 0.362 3.tree acc 0.279 MLP using learned features + high-level handcrafted features: 0.331410 MLP using learned features + high-level handcrafted feature group genre : 0.339103 MLP using learned features + high-level handcrafted feature group artstyle : 0.313462 MLP using learned features + high-level handcrafted feature group bbox : 0.303846 MLP using learned features + high-level handcrafted feature group faces and skin : 0.299359 1.mlp acc: 0.369 2.svm acc 0.383 3.tree acc 0.242 MLP using learned features + handcrafted features: 0.331410 1.mlp acc: 0.371 2.svm acc 0.392 3.tree acc 0.206 MLP using alex features: 0.323077 1.mlp acc: 0.367 2.svm acc 0.373 3.tree acc 0.244 MLP using alexnet features + learned features: 0.328205 1.mlp acc: 0.375 2.svm acc 0.390 3.tree acc 0.242 MLP using learned features + handcrafted features + alexnet features : 0.335897
print("MULTICLASS CLASSIFICATION")
t = evaluate_mlp(learned_train, labels_train, learned_test, labels_test, verbose=True)
print("MLP using learned features: {:3f}".format(t))
t= evaluate_mlp(hand_train, labels_train, hand_test, labels_test, verbose=True)
print("MLP using handcrafted features: {:3f}".format(t))
features_train = np.concatenate([learned_train, hand_train], axis = 1)
features_test = np.concatenate([learned_test, hand_test], axis = 1)
t= evaluate_mlp(features_train, labels_train, features_test, labels_test, verbose=True)
print("MLP using learned features + handcrafted features: {:3f}".format(t))
1.mlp acc: 0.707 2.svm acc 0.714 3.tree acc 0.641 MLP using learned features: 0.687314 1.mlp acc: 0.709 2.svm acc 0.716 3.tree acc 0.660 MLP using handcrafted features: 0.694977 1.mlp acc: 0.715 2.svm acc 0.722 3.tree acc 0.645 MLP using learned features + handcrafted features: 0.693912